From 71b7bc88341945f13f3951e2bb5fd247b639ff7a Mon Sep 17 00:00:00 2001
From: Mike Pall <mike>
Date: Sun, 3 Sep 2017 23:20:53 +0200
Subject: [PATCH] PPC: Add soft-float support to JIT compiler backend.

Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
Sponsored by Cisco Systems, Inc.
---
 src/lj_arch.h    |   1 -
 src/lj_asm_ppc.h | 321 ++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 278 insertions(+), 44 deletions(-)

--- a/src/lj_arch.h
+++ b/src/lj_arch.h
@@ -273,7 +273,6 @@
 #endif
 
 #if LJ_ABI_SOFTFP
-#define LJ_ARCH_NOJIT		1  /* NYI */
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL
 #else
 #define LJ_ARCH_NUMMODE		LJ_NUMMODE_DUAL_SINGLE
--- a/src/lj_asm_ppc.h
+++ b/src/lj_asm_ppc.h
@@ -226,6 +226,7 @@ static void asm_fusexrefx(ASMState *as,
   emit_tab(as, pi, rt, left, right);
 }
 
+#if !LJ_SOFTFP
 /* Fuse to multiply-add/sub instruction. */
 static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir)
 {
@@ -245,6 +246,7 @@ static int asm_fusemadd(ASMState *as, IR
   }
   return 0;
 }
+#endif
 
 /* -- Calls --------------------------------------------------------------- */
 
@@ -253,13 +255,17 @@ static void asm_gencall(ASMState *as, co
 {
   uint32_t n, nargs = CCI_XNARGS(ci);
   int32_t ofs = 8;
-  Reg gpr = REGARG_FIRSTGPR, fpr = REGARG_FIRSTFPR;
+  Reg gpr = REGARG_FIRSTGPR;
+#if !LJ_SOFTFP
+  Reg fpr = REGARG_FIRSTFPR;
+#endif
   if ((void *)ci->func)
     emit_call(as, (void *)ci->func);
   for (n = 0; n < nargs; n++) {  /* Setup args. */
     IRRef ref = args[n];
     if (ref) {
       IRIns *ir = IR(ref);
+#if !LJ_SOFTFP
       if (irt_isfp(ir->t)) {
 	if (fpr <= REGARG_LASTFPR) {
 	  lua_assert(rset_test(as->freeset, fpr));  /* Already evicted. */
@@ -271,7 +277,9 @@ static void asm_gencall(ASMState *as, co
 	  emit_spstore(as, ir, r, ofs);
 	  ofs += irt_isnum(ir->t) ? 8 : 4;
 	}
-      } else {
+      } else
+#endif
+      {
 	if (gpr <= REGARG_LASTGPR) {
 	  lua_assert(rset_test(as->freeset, gpr));  /* Already evicted. */
 	  ra_leftov(as, gpr, ref);
@@ -290,8 +298,10 @@ static void asm_gencall(ASMState *as, co
     }
     checkmclim(as);
   }
+#if !LJ_SOFTFP
   if ((ci->flags & CCI_VARARG))  /* Vararg calls need to know about FPR use. */
     emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6);
+#endif
 }
 
 /* Setup result reg/sp for call. Evict scratch regs. */
@@ -299,8 +309,10 @@ static void asm_setupresult(ASMState *as
 {
   RegSet drop = RSET_SCRATCH;
   int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
+#if !LJ_SOFTFP
   if ((ci->flags & CCI_NOFPRCLOBBER))
     drop &= ~RSET_FPR;
+#endif
   if (ra_hasreg(ir->r))
     rset_clear(drop, ir->r);  /* Dest reg handled below. */
   if (hiop && ra_hasreg((ir+1)->r))
@@ -308,7 +320,7 @@ static void asm_setupresult(ASMState *as
   ra_evictset(as, drop);  /* Evictions must be performed first. */
   if (ra_used(ir)) {
     lua_assert(!irt_ispri(ir->t));
-    if (irt_isfp(ir->t)) {
+    if (!LJ_SOFTFP && irt_isfp(ir->t)) {
       if ((ci->flags & CCI_CASTU64)) {
 	/* Use spill slot or temp slots. */
 	int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
@@ -377,6 +389,7 @@ static void asm_retf(ASMState *as, IRIns
 
 /* -- Type conversions ---------------------------------------------------- */
 
+#if !LJ_SOFTFP
 static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
 {
   RegSet allow = RSET_FPR;
@@ -409,15 +422,23 @@ static void asm_tobit(ASMState *as, IRIn
   emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP);
   emit_fab(as, PPCI_FADD, tmp, left, right);
 }
+#endif
 
 static void asm_conv(ASMState *as, IRIns *ir)
 {
   IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
+#if !LJ_SOFTFP
   int stfp = (st == IRT_NUM || st == IRT_FLOAT);
+#endif
   IRRef lref = ir->op1;
-  lua_assert(irt_type(ir->t) != st);
   lua_assert(!(irt_isint64(ir->t) ||
 	       (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
+#if LJ_SOFTFP
+  /* FP conversions are handled by SPLIT. */
+  lua_assert(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT));
+  /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */
+#else
+  lua_assert(irt_type(ir->t) != st);
   if (irt_isfp(ir->t)) {
     Reg dest = ra_dest(as, ir, RSET_FPR);
     if (stfp) {  /* FP to FP conversion. */
@@ -476,7 +497,9 @@ static void asm_conv(ASMState *as, IRIns
 	emit_fb(as, PPCI_FCTIWZ, tmp, left);
       }
     }
-  } else {
+  } else
+#endif
+  {
     Reg dest = ra_dest(as, ir, RSET_GPR);
     if (st >= IRT_I8 && st <= IRT_U16) {  /* Extend to 32 bit integer. */
       Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
@@ -496,17 +519,41 @@ static void asm_strto(ASMState *as, IRIn
 {
   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
   IRRef args[2];
-  int32_t ofs;
+  int32_t ofs = SPOFS_TMP;
+#if LJ_SOFTFP
+  ra_evictset(as, RSET_SCRATCH);
+  if (ra_used(ir)) {
+    if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) &&
+	(ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) {
+      int i;
+      for (i = 0; i < 2; i++) {
+	Reg r = (ir+i)->r;
+	if (ra_hasreg(r)) {
+	  ra_free(as, r);
+	  ra_modified(as, r);
+	  emit_spload(as, ir+i, r, sps_scale((ir+i)->s));
+	}
+      }
+      ofs = sps_scale(ir->s & ~1);
+    } else {
+      Reg rhi = ra_dest(as, ir+1, RSET_GPR);
+      Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi));
+      emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs);
+      emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4);
+    }
+  }
+#else
   RegSet drop = RSET_SCRATCH;
   if (ra_hasreg(ir->r)) rset_set(drop, ir->r);  /* Spill dest reg (if any). */
   ra_evictset(as, drop);
+  if (ir->s) ofs = sps_scale(ir->s);
+#endif
   asm_guardcc(as, CC_EQ);
   emit_ai(as, PPCI_CMPWI, RID_RET, 0);  /* Test return status. */
   args[0] = ir->op1;      /* GCstr *str */
   args[1] = ASMREF_TMP1;  /* TValue *n  */
   asm_gencall(as, ci, args);
   /* Store the result to the spill slot or temp slots. */
-  ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP;
   emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs);
 }
 
@@ -530,7 +577,10 @@ static void asm_tvptr(ASMState *as, Reg
       Reg src = ra_alloc1(as, ref, allow);
       emit_setgl(as, src, tmptv.gcr);
     }
-    type = ra_allock(as, irt_toitype(ir->t), allow);
+    if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
+      type = ra_alloc1(as, ref+1, allow);
+    else
+      type = ra_allock(as, irt_toitype(ir->t), allow);
     emit_setgl(as, type, tmptv.it);
   }
 }
@@ -574,11 +624,27 @@ static void asm_href(ASMState *as, IRIns
   Reg tisnum = RID_NONE, tmpnum = RID_NONE;
   IRRef refkey = ir->op2;
   IRIns *irkey = IR(refkey);
+  int isk = irref_isk(refkey);
   IRType1 kt = irkey->t;
   uint32_t khash;
   MCLabel l_end, l_loop, l_next;
 
   rset_clear(allow, tab);
+#if LJ_SOFTFP
+  if (!isk) {
+    key = ra_alloc1(as, refkey, allow);
+    rset_clear(allow, key);
+    if (irkey[1].o == IR_HIOP) {
+      if (ra_hasreg((irkey+1)->r)) {
+	tmpnum = (irkey+1)->r;
+	ra_noweak(as, tmpnum);
+      } else {
+	tmpnum = ra_allocref(as, refkey+1, allow);
+      }
+      rset_clear(allow, tmpnum);
+    }
+  }
+#else
   if (irt_isnum(kt)) {
     key = ra_alloc1(as, refkey, RSET_FPR);
     tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
@@ -588,6 +654,7 @@ static void asm_href(ASMState *as, IRIns
     key = ra_alloc1(as, refkey, allow);
     rset_clear(allow, key);
   }
+#endif
   tmp2 = ra_scratch(as, allow);
   rset_clear(allow, tmp2);
 
@@ -610,7 +677,7 @@ static void asm_href(ASMState *as, IRIns
     asm_guardcc(as, CC_EQ);
   else
     emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end);
-  if (irt_isnum(kt)) {
+  if (!LJ_SOFTFP && irt_isnum(kt)) {
     emit_fab(as, PPCI_FCMPU, 0, tmpnum, key);
     emit_condbranch(as, PPCI_BC, CC_GE, l_next);
     emit_ab(as, PPCI_CMPLW, tmp1, tisnum);
@@ -620,7 +687,10 @@ static void asm_href(ASMState *as, IRIns
       emit_ab(as, PPCI_CMPW, tmp2, key);
       emit_condbranch(as, PPCI_BC, CC_NE, l_next);
     }
-    emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
+    if (LJ_SOFTFP && ra_hasreg(tmpnum))
+      emit_ab(as, PPCI_CMPW, tmp1, tmpnum);
+    else
+      emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t));
     if (!irt_ispri(kt))
       emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
   }
@@ -629,19 +699,19 @@ static void asm_href(ASMState *as, IRIns
 	    (((char *)as->mcp-(char *)l_loop) & 0xffffu);
 
   /* Load main position relative to tab->node into dest. */
-  khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
+  khash = isk ? ir_khash(irkey) : 1;
   if (khash == 0) {
     emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
   } else {
     Reg tmphash = tmp1;
-    if (irref_isk(refkey))
+    if (isk)
       tmphash = ra_allock(as, khash, allow);
     emit_tab(as, PPCI_ADD, dest, dest, tmp1);
     emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node));
     emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash);
     emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node));
     emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
-    if (irref_isk(refkey)) {
+    if (isk) {
       /* Nothing to do. */
     } else if (irt_isstr(kt)) {
       emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, hash));
@@ -651,13 +721,19 @@ static void asm_href(ASMState *as, IRIns
       emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2);
       emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31);
       emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2);
-      if (irt_isnum(kt)) {
+      if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) {
+#if LJ_SOFTFP
+	emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
+	emit_rotlwi(as, dest, tmp1, HASH_ROT1);
+	emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum);
+#else
 	int32_t ofs = ra_spill(as, irkey);
 	emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1);
 	emit_rotlwi(as, dest, tmp1, HASH_ROT1);
 	emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1);
 	emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4);
 	emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs);
+#endif
       } else {
 	emit_asb(as, PPCI_XOR, tmp2, key, tmp1);
 	emit_rotlwi(as, dest, tmp1, HASH_ROT1);
@@ -784,8 +860,8 @@ static PPCIns asm_fxloadins(IRIns *ir)
   case IRT_U8: return PPCI_LBZ;
   case IRT_I16: return PPCI_LHA;
   case IRT_U16: return PPCI_LHZ;
-  case IRT_NUM: return PPCI_LFD;
-  case IRT_FLOAT: return PPCI_LFS;
+  case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_LFD;
+  case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS;
   default: return PPCI_LWZ;
   }
 }
@@ -795,8 +871,8 @@ static PPCIns asm_fxstoreins(IRIns *ir)
   switch (irt_type(ir->t)) {
   case IRT_I8: case IRT_U8: return PPCI_STB;
   case IRT_I16: case IRT_U16: return PPCI_STH;
-  case IRT_NUM: return PPCI_STFD;
-  case IRT_FLOAT: return PPCI_STFS;
+  case IRT_NUM: lua_assert(!LJ_SOFTFP); return PPCI_STFD;
+  case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS;
   default: return PPCI_STW;
   }
 }
@@ -839,7 +915,8 @@ static void asm_fstore(ASMState *as, IRI
 
 static void asm_xload(ASMState *as, IRIns *ir)
 {
-  Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+  Reg dest = ra_dest(as, ir,
+    (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
   lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
   if (irt_isi8(ir->t))
     emit_as(as, PPCI_EXTSB, dest, dest);
@@ -857,7 +934,8 @@ static void asm_xstore_(ASMState *as, IR
     Reg src = ra_alloc1(as, irb->op1, RSET_GPR);
     asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src));
   } else {
-    Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
+    Reg src = ra_alloc1(as, ir->op2,
+      (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR);
     asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
 		 rset_exclude(RSET_GPR, src), ofs);
   }
@@ -871,10 +949,19 @@ static void asm_ahuvload(ASMState *as, I
   Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx;
   RegSet allow = RSET_GPR;
   int32_t ofs = AHUREF_LSX;
+  if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) {
+    t.irt = IRT_NUM;
+    if (ra_used(ir+1)) {
+      type = ra_dest(as, ir+1, allow);
+      rset_clear(allow, type);
+    }
+    ofs = 0;
+  }
   if (ra_used(ir)) {
-    lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
-    if (!irt_isnum(t)) ofs = 0;
-    dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
+    lua_assert((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) ||
+	       irt_isint(ir->t) || irt_isaddr(ir->t));
+    if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0;
+    dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
     rset_clear(allow, dest);
   }
   idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
@@ -883,12 +970,13 @@ static void asm_ahuvload(ASMState *as, I
     asm_guardcc(as, CC_GE);
     emit_ab(as, PPCI_CMPLW, type, tisnum);
     if (ra_hasreg(dest)) {
-      if (ofs == AHUREF_LSX) {
+      if (!LJ_SOFTFP && ofs == AHUREF_LSX) {
 	tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR,
 						       (idx&255)), (idx>>8)));
 	emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp);
       } else {
-	emit_fai(as, PPCI_LFD, dest, idx, ofs);
+	emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx,
+		 ofs+4*LJ_SOFTFP);
       }
     }
   } else {
@@ -911,7 +999,7 @@ static void asm_ahustore(ASMState *as, I
   int32_t ofs = AHUREF_LSX;
   if (ir->r == RID_SINK)
     return;
-  if (irt_isnum(ir->t)) {
+  if (!LJ_SOFTFP && irt_isnum(ir->t)) {
     src = ra_alloc1(as, ir->op2, RSET_FPR);
   } else {
     if (!irt_ispri(ir->t)) {
@@ -919,11 +1007,14 @@ static void asm_ahustore(ASMState *as, I
       rset_clear(allow, src);
       ofs = 0;
     }
-    type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
+    if (LJ_SOFTFP && (ir+1)->o == IR_HIOP)
+      type = ra_alloc1(as, (ir+1)->op2, allow);
+    else
+      type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
     rset_clear(allow, type);
   }
   idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
-  if (irt_isnum(ir->t)) {
+  if (!LJ_SOFTFP && irt_isnum(ir->t)) {
     if (ofs == AHUREF_LSX) {
       emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP);
       emit_slwi(as, RID_TMP, (idx>>8), 3);
@@ -948,21 +1039,33 @@ static void asm_sload(ASMState *as, IRIn
   IRType1 t = ir->t;
   Reg dest = RID_NONE, type = RID_NONE, base;
   RegSet allow = RSET_GPR;
+  int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP);
+  if (hiop)
+    t.irt = IRT_NUM;
   lua_assert(!(ir->op2 & IRSLOAD_PARENT));  /* Handled by asm_head_side(). */
-  lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
+  lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK));
   lua_assert(LJ_DUALNUM ||
 	     !irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
+#if LJ_SOFTFP
+  lua_assert(!(ir->op2 & IRSLOAD_CONVERT));  /* Handled by LJ_SOFTFP SPLIT. */
+  if (hiop && ra_used(ir+1)) {
+    type = ra_dest(as, ir+1, allow);
+    rset_clear(allow, type);
+  }
+#else
   if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
     dest = ra_scratch(as, RSET_FPR);
     asm_tointg(as, ir, dest);
     t.irt = IRT_NUM;  /* Continue with a regular number type check. */
-  } else if (ra_used(ir)) {
+  } else
+#endif
+  if (ra_used(ir)) {
     lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
-    dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
+    dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow);
     rset_clear(allow, dest);
     base = ra_alloc1(as, REF_BASE, allow);
     rset_clear(allow, base);
-    if ((ir->op2 & IRSLOAD_CONVERT)) {
+    if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) {
       if (irt_isint(t)) {
 	emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO);
 	dest = ra_scratch(as, RSET_FPR);
@@ -994,10 +1097,13 @@ dotypecheck:
     if ((ir->op2 & IRSLOAD_TYPECHECK)) {
       Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow);
       asm_guardcc(as, CC_GE);
-      emit_ab(as, PPCI_CMPLW, RID_TMP, tisnum);
+#if !LJ_SOFTFP
       type = RID_TMP;
+#endif
+      emit_ab(as, PPCI_CMPLW, type, tisnum);
     }
-    if (ra_hasreg(dest)) emit_fai(as, PPCI_LFD, dest, base, ofs-4);
+    if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest,
+				  base, ofs-(LJ_SOFTFP?0:4));
   } else {
     if ((ir->op2 & IRSLOAD_TYPECHECK)) {
       asm_guardcc(as, CC_NE);
@@ -1119,6 +1225,7 @@ static void asm_obar(ASMState *as, IRIns
 
 /* -- Arithmetic and logic operations ------------------------------------- */
 
+#if !LJ_SOFTFP
 static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi)
 {
   Reg dest = ra_dest(as, ir, RSET_FPR);
@@ -1146,13 +1253,17 @@ static void asm_fpmath(ASMState *as, IRI
   else
     asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
 }
+#endif
 
 static void asm_add(ASMState *as, IRIns *ir)
 {
+#if !LJ_SOFTFP
   if (irt_isnum(ir->t)) {
     if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD))
       asm_fparith(as, ir, PPCI_FADD);
-  } else {
+  } else
+#endif
+  {
     Reg dest = ra_dest(as, ir, RSET_GPR);
     Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
     PPCIns pi;
@@ -1191,10 +1302,13 @@ static void asm_add(ASMState *as, IRIns
 
 static void asm_sub(ASMState *as, IRIns *ir)
 {
+#if !LJ_SOFTFP
   if (irt_isnum(ir->t)) {
     if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB))
       asm_fparith(as, ir, PPCI_FSUB);
-  } else {
+  } else
+#endif
+  {
     PPCIns pi = PPCI_SUBF;
     Reg dest = ra_dest(as, ir, RSET_GPR);
     Reg left, right;
@@ -1220,9 +1334,12 @@ static void asm_sub(ASMState *as, IRIns
 
 static void asm_mul(ASMState *as, IRIns *ir)
 {
+#if !LJ_SOFTFP
   if (irt_isnum(ir->t)) {
     asm_fparith(as, ir, PPCI_FMUL);
-  } else {
+  } else
+#endif
+  {
     PPCIns pi = PPCI_MULLW;
     Reg dest = ra_dest(as, ir, RSET_GPR);
     Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
@@ -1250,9 +1367,12 @@ static void asm_mul(ASMState *as, IRIns
 
 static void asm_neg(ASMState *as, IRIns *ir)
 {
+#if !LJ_SOFTFP
   if (irt_isnum(ir->t)) {
     asm_fpunary(as, ir, PPCI_FNEG);
-  } else {
+  } else
+#endif
+  {
     Reg dest, left;
     PPCIns pi = PPCI_NEG;
     if (as->flagmcp == as->mcp) {
@@ -1563,9 +1683,40 @@ static void asm_bitshift(ASMState *as, I
 		       PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31))
 #define asm_bror(as, ir)	lua_assert(0)
 
+#if LJ_SOFTFP
+static void asm_sfpmin_max(ASMState *as, IRIns *ir)
+{
+  CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp];
+  IRRef args[4];
+  MCLabel l_right, l_end;
+  Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR);
+  Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR);
+  Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR);
+  PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE;
+  righthi = (lefthi >> 8); lefthi &= 255;
+  rightlo = (leftlo >> 8); leftlo &= 255;
+  args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
+  args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
+  l_end = emit_label(as);
+  if (desthi != righthi) emit_mr(as, desthi, righthi);
+  if (destlo != rightlo) emit_mr(as, destlo, rightlo);
+  l_right = emit_label(as);
+  if (l_end != l_right) emit_jmp(as, l_end);
+  if (desthi != lefthi) emit_mr(as, desthi, lefthi);
+  if (destlo != leftlo) emit_mr(as, destlo, leftlo);
+  if (l_right == as->mcp+1) {
+    cond ^= 4; l_right = l_end; ++as->mcp;
+  }
+  emit_condbranch(as, PPCI_BC, cond, l_right);
+  ra_evictset(as, RSET_SCRATCH);
+  emit_cmpi(as, RID_RET, 1);
+  asm_gencall(as, &ci, args);
+}
+#endif
+
 static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
 {
-  if (irt_isnum(ir->t)) {
+  if (!LJ_SOFTFP && irt_isnum(ir->t)) {
     Reg dest = ra_dest(as, ir, RSET_FPR);
     Reg tmp = dest;
     Reg right, left = ra_alloc2(as, ir, RSET_FPR);
@@ -1653,7 +1804,7 @@ static void asm_intcomp_(ASMState *as, I
 static void asm_comp(ASMState *as, IRIns *ir)
 {
   PPCCC cc = asm_compmap[ir->o];
-  if (irt_isnum(ir->t)) {
+  if (!LJ_SOFTFP && irt_isnum(ir->t)) {
     Reg right, left = ra_alloc2(as, ir, RSET_FPR);
     right = (left >> 8); left &= 255;
     asm_guardcc(as, (cc >> 4));
@@ -1674,6 +1825,44 @@ static void asm_comp(ASMState *as, IRIns
 
 #define asm_equal(as, ir)	asm_comp(as, ir)
 
+#if LJ_SOFTFP
+/* SFP comparisons. */
+static void asm_sfpcomp(ASMState *as, IRIns *ir)
+{
+  const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp];
+  RegSet drop = RSET_SCRATCH;
+  Reg r;
+  IRRef args[4];
+  args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1;
+  args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2;
+
+  for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) {
+    if (!rset_test(as->freeset, r) &&
+	regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR])
+      rset_clear(drop, r);
+  }
+  ra_evictset(as, drop);
+  asm_setupresult(as, ir, ci);
+  switch ((IROp)ir->o) {
+  case IR_ULT:
+    asm_guardcc(as, CC_EQ);
+    emit_ai(as, PPCI_CMPWI, RID_RET, 0);
+  case IR_ULE:
+    asm_guardcc(as, CC_EQ);
+    emit_ai(as, PPCI_CMPWI, RID_RET, 1);
+    break;
+  case IR_GE: case IR_GT:
+    asm_guardcc(as, CC_EQ);
+    emit_ai(as, PPCI_CMPWI, RID_RET, 2);
+  default:
+    asm_guardcc(as, (asm_compmap[ir->o] & 0xf));
+    emit_ai(as, PPCI_CMPWI, RID_RET, 0);
+    break;
+  }
+  asm_gencall(as, ci, args);
+}
+#endif
+
 #if LJ_HASFFI
 /* 64 bit integer comparisons. */
 static void asm_comp64(ASMState *as, IRIns *ir)
@@ -1703,19 +1892,36 @@ static void asm_comp64(ASMState *as, IRI
 /* Hiword op of a split 64 bit op. Previous op must be the loword op. */
 static void asm_hiop(ASMState *as, IRIns *ir)
 {
-#if LJ_HASFFI
+#if LJ_HASFFI || LJ_SOFTFP
   /* HIOP is marked as a store because it needs its own DCE logic. */
   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
   if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
     as->curins--;  /* Always skip the CONV. */
+#if LJ_HASFFI && !LJ_SOFTFP
     if (usehi || uselo)
       asm_conv64(as, ir);
     return;
+#endif
   } else if ((ir-1)->o <= IR_NE) {  /* 64 bit integer comparisons. ORDER IR. */
     as->curins--;  /* Always skip the loword comparison. */
+#if LJ_SOFTFP
+    if (!irt_isint(ir->t)) {
+      asm_sfpcomp(as, ir-1);
+      return;
+    }
+#endif
+#if LJ_HASFFI
     asm_comp64(as, ir);
+#endif
+    return;
+#if LJ_SOFTFP
+  } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) {
+      as->curins--;  /* Always skip the loword min/max. */
+    if (uselo || usehi)
+      asm_sfpmin_max(as, ir-1);
     return;
+#endif
   } else if ((ir-1)->o == IR_XSTORE) {
     as->curins--;  /* Handle both stores here. */
     if ((ir-1)->r != RID_SINK) {
@@ -1726,14 +1932,27 @@ static void asm_hiop(ASMState *as, IRIns
   }
   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
   switch ((ir-1)->o) {
+#if LJ_HASFFI
   case IR_ADD: as->curins--; asm_add64(as, ir); break;
   case IR_SUB: as->curins--; asm_sub64(as, ir); break;
   case IR_NEG: as->curins--; asm_neg64(as, ir); break;
+#endif
+#if LJ_SOFTFP
+  case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
+  case IR_STRTO:
+    if (!uselo)
+      ra_allocref(as, ir->op1, RSET_GPR);  /* Mark lo op as used. */
+    break;
+#endif
   case IR_CALLN:
+  case IR_CALLS:
   case IR_CALLXS:
     if (!uselo)
       ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
     break;
+#if LJ_SOFTFP
+  case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR:
+#endif
   case IR_CNEWI:
     /* Nothing to do here. Handled by lo op itself. */
     break;
@@ -1797,8 +2016,19 @@ static void asm_stack_restore(ASMState *
     if ((sn & SNAP_NORESTORE))
       continue;
     if (irt_isnum(ir->t)) {
+#if LJ_SOFTFP
+      Reg tmp;
+      RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
+      lua_assert(irref_isk(ref));  /* LJ_SOFTFP: must be a number constant. */
+      tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow);
+      emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0));
+      if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1);
+      tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow);
+      emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4));
+#else
       Reg src = ra_alloc1(as, ref, RSET_FPR);
       emit_fai(as, PPCI_STFD, src, RID_BASE, ofs);
+#endif
     } else {
       Reg type;
       RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
@@ -1811,6 +2041,10 @@ static void asm_stack_restore(ASMState *
       if ((sn & (SNAP_CONT|SNAP_FRAME))) {
 	if (s == 0) continue;  /* Do not overwrite link to previous frame. */
 	type = ra_allock(as, (int32_t)(*flinks--), allow);
+#if LJ_SOFTFP
+      } else if ((sn & SNAP_SOFTFPNUM)) {
+	type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE));
+#endif
       } else {
 	type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
       }
@@ -1947,14 +2181,15 @@ static Reg asm_setup_call_slots(ASMState
   int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
   asm_collectargs(as, ir, ci, args);
   for (i = 0; i < nargs; i++)
-    if (args[i] && irt_isfp(IR(args[i])->t)) {
+    if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) {
       if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1;
     } else {
       if (ngpr > 0) ngpr--; else nslots++;
     }
   if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
     as->evenspill = nslots;
-  return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
+  return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) :
+					   REGSP_HINT(RID_RET);
 }
 
 static void asm_setup_target(ASMState *as)
